# -*- coding:utf-8 -*-

from bs4 import BeautifulSoup
import requests


def get_proxy(url='http://www.xicidaili.com/nn'):
    header = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
    }
    response = requests.get(url=url, headers=header)
    bs = BeautifulSoup(response.text, 'lxml')
    ip_list = bs.find(attrs={'id': 'ip_list'}).find_all('tr')
    candidate_ip = {'HTTP':[], 'HTTPS': []}
    for ip_i in ip_list[1:]:
        ip_content = ip_i.find_all('td')
        ip = ip_content[1].text
        port = ip_content[2].text
        protocol = ip_content[5].text
        time = ip_content[-2].text
        if u'天' in time:
            candidate_ip[protocol].append((ip, port))
            # print ip, port, protocol, time
        elif u'小时' in time:
            judge_time = int(time.replace(u'小时', ''))
            if judge_time > 10:
                candidate_ip[protocol].append((ip, port))
                # print ip, port, protocol, time
    return candidate_ip


if __name__ == '__main__':
    print get_proxy()
